#

title: “Class 05: Data Visualization with GGPLOT” author: “Kira Jung (PID A16026398)” date: “April 24, 2023”

## Week 3 Data Visulization With ggplot2

# Install packages with install.package("ggplot2") and library(ggplot2).

# Q1: For which phases is data visualization important in our scientific workflows?
# A1: All of the above (communication of results, exploratory data analysis, detection of outliers).

# Q2: True or False? The ggplot2 package comes already installed with R?
#A2: FALSE.

# Q3: Which plot types are typically NOT used to compare distributions of numeric variables?
# A3: Network graphs

# Q4: Which statement about data visualization with ggplot2 is incorrect?
# A4: "ggplot2 is the only way to create plots in R".

library(ggplot2)
View(cars)
plot(cars)

ggplot(data=cars) + aes(x=speed, y=dist) + geom_point()

p <- ggplot(data=cars) + aes(x=speed, y=dist) + geom_point()

# Add a line geom with geom_line()
p + geom_line()

# Add a trendline close to the data
p + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

p + geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'

# Read in our drug expression data
url <- "https://bioboot.github.io/bimm143_S20/class-material/up_down_expression.txt"
genes <- read.delim(url)
head(genes)
##         Gene Condition1 Condition2      State
## 1      A4GNT -3.6808610 -3.4401355 unchanging
## 2       AAAS  4.5479580  4.3864126 unchanging
## 3      AASDH  3.7190695  3.4787276 unchanging
## 4       AATF  5.0784720  5.0151916 unchanging
## 5       AATK  0.4711421  0.5598642 unchanging
## 6 AB015752.4 -3.6808610 -3.5921390 unchanging
View(genes)

# Q. how many genes in dataset = 5196
nrow(genes)
## [1] 5196
# Q. column names and number = (4) Gene, Condition1, Condition2, State
colnames(genes)
## [1] "Gene"       "Condition1" "Condition2" "State"
ncol(genes)
## [1] 4
# Q. how many 'up' regulated genes = 127 genes
table(genes$State)
## 
##       down unchanging         up 
##         72       4997        127
# Q. What fraction of total genes are up-regulated = 2.4%
round((table(genes$State) / nrow(genes)) * 100, 2)
## 
##       down unchanging         up 
##       1.39      96.17       2.44
# Let's make a first plot attempt
g <- ggplot(data=genes) + aes(x=Condition1, y=Condition2, col=State) + geom_point()

# Add some color
g + scale_color_manual(values=c("blue", "gray", "red")) + labs(title="Gene expression changes", x="Control(no drug)", y="Condition 2") + theme_bw()

# 7 - Optional Portion
# installing the gapminder package
# install.packages("gapminder")
url <- "https://raw.githubusercontent.com/jennybc/gapminder/master/inst/extdata/gapminder.tsv"
gapminder <- read.delim(url)

# installing dplyr package
# install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
gapminder_2007 <- gapminder %>% filter(year==2007)

# basic scatter plot of gapminder_2007 dataset
ggplot(gapminder_2007) + aes(x=gdpPercap, y=lifeExp) + geom_point(alpha=0.5)

# scatterplot of gapminder_2007 dataset with color and 4 variables
ggplot(gapminder_2007) + aes(x=gdpPercap, y=lifeExp, color=continent, size=pop) + geom_point(alpha=0.5)

# scatterplot of gapminder_2007 dataset, colored by numeric variable population
ggplot(gapminder_2007) + aes(x=gdpPercap, y=lifeExp, color=pop) + geom_point(alpha=0.8)

# Adjusting scale size of gapminder_2007 scatter plot to reflect population differences
ggplot(gapminder_2007) + aes(x=gdpPercap, y=lifeExp, size=pop) + geom_point(alpha=0.5) + scale_size_area(max_size=10)

# Q. Final 1957 and 2007 gapminder plots side by side.
library(dplyr)
gapminder_1957 <- gapminder %>% filter(year==1957 | year==2007)
ggplot(gapminder_1957) + geom_point(aes(x=gdpPercap, y=lifeExp, color=continent, size=pop), alpha=0.7) + scale_size_area(max_size=10) + facet_wrap(~year)

# 8 - Optional Bar Charts Section
gapminder_top5 <- gapminder %>% filter(year==2007) %>% arrange(desc(pop)) %>% top_n(5, pop)

# ggplot(gapminder_top5)
geom_col(aes(x=country, y=pop))
## mapping: x = ~country, y = ~pop 
## geom_col: just = 0.5, width = NULL, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_stack
# Q. Bar chart showing life expectancy of 5 biggest countries by population in 2007.
ggplot(gapminder_top5) + geom_col(aes(x=country, y=lifeExp))

# Gapminder_top5 Bar Chart with color by population
ggplot(gapminder_top5) + geom_col(aes(x=country, y=pop, fill=continent))

# # Gapminder_top5 Bar Chart with color by life expectancy
ggplot(gapminder_top5) + geom_col(aes(x=country, y=pop, fill=lifeExp))

# Q.Gapminder_top5 Bar Chart by population
gapminder_top5 <- gapminder %>% filter(year==2007) %>% arrange(desc(pop)) %>% top_n(5, pop)

ggplot(gapminder_top5) + geom_col(aes(x=reorder(country, -pop), y=pop, fill=country, col="gray30", fill="none"))
## Warning: Duplicated aesthetics after name standardisation: fill
## Duplicated aesthetics after name standardisation: fill

# Flipping Bar Charts
head(USArrests)
##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7
# USArrests$State <- rownames(USArrests)
# ggplot(USArrests) + aes(x=reorder(State, Murder), y=Murder) + geom_col() + coord_flip()
# ggplot(USArrests) + aes(x=reorder(State, Murder), y=Murder) + geom_point() + geom_segment(aes(x=State, xend=State, y=0, yend=Murder), color="blue") + coord_flip()

# 9 - Animation
# install.packages("gifski")
# install.packages("gganimate")

library(gapminder)
## 
## Attaching package: 'gapminder'
## 
## The following object is masked _by_ '.GlobalEnv':
## 
##     gapminder
library(gganimate)

ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, colour = country)) +
  geom_point(alpha = 0.7, show.legend = FALSE) +
  scale_colour_manual(values = country_colors) +
  scale_size(range = c(2, 12)) +
  scale_x_log10() + facet_wrap(~continent) + labs(title = 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
  transition_time(year) +
  shadow_wake(wake_length = 0.1, alpha = FALSE)

# 10 - Combining Plots

# install.packages("patchwork")
# library(patchwork)

p1 <- ggplot(mtcars) + geom_point(aes(mpg, disp))
p2 <- ggplot(mtcars) + geom_boxplot(aes(gear, disp, group = gear))
p3 <- ggplot(mtcars) + geom_smooth(aes(disp, qsec))
p4 <- ggplot(mtcars) + geom_bar(aes(carb))

# (p1 | p2 | p3) / p4

# Session Info
sessionInfo()
## R version 4.2.3 (2023-03-15 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] gganimate_1.0.8 gapminder_1.0.0 dplyr_1.1.2     ggplot2_3.4.2  
## 
## loaded via a namespace (and not attached):
##  [1] bslib_0.4.2       compiler_4.2.3    pillar_1.9.0      jquerylib_0.1.4  
##  [5] prettyunits_1.1.1 progress_1.2.2    tools_4.2.3       digest_0.6.31    
##  [9] lattice_0.21-8    nlme_3.1-162      jsonlite_1.8.4    evaluate_0.20    
## [13] lifecycle_1.0.3   tibble_3.2.1      gtable_0.3.3      mgcv_1.8-42      
## [17] pkgconfig_2.0.3   rlang_1.1.0       Matrix_1.5-4      cli_3.6.1        
## [21] rstudioapi_0.14   yaml_2.3.7        xfun_0.39         fastmap_1.1.1    
## [25] withr_2.5.0       knitr_1.42        hms_1.1.3         generics_0.1.3   
## [29] vctrs_0.6.2       sass_0.4.5        grid_4.2.3        tidyselect_1.2.0 
## [33] glue_1.6.2        R6_2.5.1          gifski_1.6.6-1    fansi_1.0.4      
## [37] rmarkdown_2.21    tweenr_2.0.2      farver_2.1.1      magrittr_2.0.3   
## [41] splines_4.2.3     scales_1.2.1      htmltools_0.5.5   colorspace_2.1-0 
## [45] labeling_0.4.2    utf8_1.2.3        stringi_1.7.12    munsell_0.5.0    
## [49] cachem_1.0.7      crayon_1.5.2